library(data.table)
library(tidyverse)
library(smacof)
library(DESeq2)
library(InteractionSet)
library(diffHic)
library(broom)
library(highcharter)
library(heatmaply)
library(plotly)
library(eulerr)
library(reactable)
library(knitr)
# ("data.table", "tidyverse", "smacof", "DESeq2", "InteractionSet", "diffHic",
# "broom", "highcharter", "heatmaply", "plotly", "eulerr")
# set your working directory
setwd("~/Documents/HGEN_663/extra/lec12")
load('lec12.RData')
There’s a number of ways through which we can examine the correlation matrix
tibble(s = names(hr)) %>%
mutate(type = factor(sub('_.*', '', s))) %>%
column_to_rownames('s') %>%
heatmaply(hr, row_side_colors = ., col_side_colors = .,
label_names = c("Sample 1", "Sample 2", "SCC"))
sim2diss(hr, method = 'corr') %>%
mds(ndim = 2) %>%
.$conf %>%
as.data.frame() %>%
rownames_to_column('s') %>%
mutate(type = sub('_.*', '', s)) %>%
plot_ly(x = ~D1, y = ~D2, color = ~type, text = ~s) %>%
add_markers(legendgroup = ~type) %>%
add_text(textposition = 'top left', showlegend = F, legendgroup = ~type)
For the contact probability decay curves and their derivatives that we’ve computed before, we can again just simply visualize them
# colors for each cell type
hclrs <- c(ESC = "#7cb5ec", NPC = "#434348")
# plot data
pd <- ps %>%
lapply(function(x) {
d <- rbindlist(x, idcol = 'samp')
ifelse('slope' %in% names(d), 'slope', 'balanced.avg') %>%
c('samp', 's_bp', .) %>%
d[, ., with = F] %>%
`colnames<-`(c('samp', 'x', 'y'))
}) %>%
rbindlist(idcol = 'kind') %>%
group_by(kind, samp) %>%
do(data = list_parse2(data.frame(.$x, .$y))) %>%
ungroup() %>%
separate(samp, c('ctype', 'rep'), '_', F, fill = 'right') %>%
#dplyr::filter(is.na(rep)) %>%
mutate(color = hclrs[ctype],
ctype = factor(ctype, names(hclrs))) %>%
arrange(ctype) %>%
mutate(name = samp,
id = ifelse(kind != 'log', tolower(name), NA),
linkedTo = ifelse(kind == 'log', tolower(name), NA),
yAxis = ifelse(kind == 'log', 0, 2))
highchart() %>%
# we use log scale for P(s) and linear for the slope (which was already taken in log space)
hc_yAxis_multiples(list(title = list(text = "<b>Contact probability</b>, P<sub>c</sub>(s)",
useHTML = TRUE),
type = "logarithmic",
labels = list(formatter = JS("function(){return this.value.toExponential(0);}")),
height = '45%', top = '0%', offset = 0),
list(height = '10%', top = '45%',
title = NULL,
plotLines = list(
list(color = "#a9a9a9", width = 2,
value = .5, zIndex = 1)
),
labels = list(enabled = F),
gridLineWidth = 0,
min = 0, max = 1),
list(type = "linear",
title = list(
text = "<b>Slope</b>, <sup>d</sup>⁄<sub>ds</sub> log P<sub>c</sub>(s)",
useHTML = TRUE
),
height = '45%', top = '55%', offset = 0)) %>%
# grab the data
hc_add_series_list(pd) %>%
# a bit of formatting
hc_xAxis(type = "logarithmic",
title = list(text = "<b>Genomic separation</b> (bp), s",
useHTML = T),
minorTickInterval = 'auto',
min = 1e4, max = 1e8) %>%
hc_tooltip(headerFormat = '<span style="font-size: 10px">{point.key:,.0f} bp</span><br/>') %>%
hc_chart(zoomType = "xy") %>%
hc_plotOptions(line = list(marker = list(enabled = F)))
Next we’ll visualize the first eigenvector (i.e., “compartment score”)
imageList <- list.files("~/Documents/HGEN_663/extra/lec12", pattern= "saddleplot.png", full.names=T)
include_graphics(imageList,dpi = 600)
For the simplest comparison we can just count the number of shared boundaries
bdrs <- ins[c('ESC', 'NPC')] %>%
lapply(function(x) {
na.omit(x) %>%
dplyr::filter(boundary_strength_100000 > .5) %>%
mutate(start = start + 1) %>%
makeGRangesFromDataFrame()
})
Reduce(c, bdrs) %>%
unique() %>%
{lapply(bdrs, function(x) overlapsAny(., x))} %>%
bind_cols() %>%
euler() %>%
plot(quantities = T)
These are loops called separately in each sample. There are specific classes in R that handles paired ranges, one of which is GInteractions
lps <- dots[c('ESC', 'NPC')] %>%
lapply(function(x) {
list(x[,1:3], x[,4:6]) %>%
lapply(function(y) {
y %>%
`colnames<-`(c('chr', 'start', 'end')) %>%
mutate(start = start + 1) %>%
makeGRangesFromDataFrame()
}) %>%
{GInteractions(.[[1]], .[[2]], mode = 'reverse')}
})
lps$ESC
## ReverseStrictGInteractions object with 3738 interactions and 0 metadata columns:
## seqnames1 ranges1 seqnames2 ranges2
## <Rle> <IRanges> <Rle> <IRanges>
## [1] chr1 4750001-4775000 --- chr1 4500001-4525000
## [2] chr1 5000001-5025000 --- chr1 4475001-4500000
## [3] chr1 5900001-5925000 --- chr1 4475001-4500000
## [4] chr1 5900001-5925000 --- chr1 4750001-4775000
## [5] chr1 5900001-5925000 --- chr1 4900001-4925000
## ... ... ... ... ... ...
## [3734] chrX 167800001-167825000 --- chrX 167300001-167325000
## [3735] chrX 167800001-167825000 --- chrX 167475001-167500000
## [3736] chrX 168925001-168950000 --- chrX 168400001-168425000
## [3737] chrX 168925001-168950000 --- chrX 168700001-168725000
## [3738] chrX 169275001-169300000 --- chrX 168925001-168950000
## -------
## regions: 5537 ranges and 0 metadata columns
## seqinfo: 20 sequences from an unspecified genome; no seqlengths
lps$NPC
## ReverseStrictGInteractions object with 4394 interactions and 0 metadata columns:
## seqnames1 ranges1 seqnames2 ranges2
## <Rle> <IRanges> <Rle> <IRanges>
## [1] chr1 4750001-4775000 --- chr1 4500001-4525000
## [2] chr1 5900001-5925000 --- chr1 4750001-4775000
## [3] chr1 5900001-5925000 --- chr1 5100001-5125000
## [4] chr1 6125001-6150000 --- chr1 5100001-5125000
## [5] chr1 6125001-6150000 --- chr1 5900001-5925000
## ... ... ... ... ... ...
## [4390] chrX 167225001-167250000 --- chrX 166650001-166675000
## [4391] chrX 167200001-167225000 --- chrX 166800001-166825000
## [4392] chrX 168925001-168950000 --- chrX 167275001-167300000
## [4393] chrX 168925001-168950000 --- chrX 168650001-168675000
## [4394] chrX 169775001-169800000 --- chrX 169325001-169350000
## -------
## regions: 6117 ranges and 0 metadata columns
## seqinfo: 20 sequences from an unspecified genome; no seqlengths
imageList <- list.files("~/Documents/HGEN_663/extra/lec12", pattern= "pileup.png", full.names=T)
include_graphics(imageList,dpi = 600)
diff_loop <- fread("output.pareidolia.tsv")
diff_loop %>% reactable()